/*******************************************************************************					

	DESCRIPTION: 	This do file creates a dataset with wages.

*******************************************************************************/

clear all
global id_code 001_5

********************************************************************************
* A1: Combine data from all years
********************************************************************************

use "${wages}/centralgovt_wages1990", clear
gen year=1990
gen dataset=1

forvalues year = 1991/2017 {

	display("`year'")
	append using "${wages}/centralgovt_wages`year'.dta"
	replace year=`year' if year==.
	replace dataset=1 if dataset==.
}


foreach dataset in "localgovt" "private_bluecollar" "private_whitecollar" "regionalgovt" {
	forvalues year = 1990/2017 {

		display("`year'")
		
		if "`year'" == "1996" & "`dataset'" == "private_bluecollar"{
		destring ArbTidArt, replace
		}
		
		append using "${wages}/`dataset'_wages`year'.dta"
		replace year=`year' if year==.
		replace dataset=2 if dataset==. & "`dataset'"=="localgovt"
		replace dataset=3 if dataset==. & "`dataset'"=="private_bluecollar"
		replace dataset=4 if dataset==. & "`dataset'"=="private_whitecollar"
		replace dataset=5 if dataset==. & "`dataset'"=="regionalgovt"
	}
}			
													
duplicates drop	
compress 

save "${data_intermediate}/${id_code}_Wages_AllYears.dta", replace

********************************************************************************
* A2: Keep and clean relevant variables
********************************************************************************

use "${data_intermediate}/${id_code}_Wages_AllYears.dta", clear

keep LopNr_PersonNr LopNr_PeOrgNr Manl ovktid ovktidh Tjomf year dataset
 
* Rename variables
rename LopNr_PeOrgNr firm
rename Manl monthlyWage
rename ovktid hours
rename ovktid hoursFullTime
rename Tjomf percenFullTime

* Keep the highest wage in a year unless missing
gsort year LopNr_PersonNr -monthlyWage
by year LopNr_PersonNr: drop if monthlyWage==. & _N>1 //24 940 dropped
by year LopNr_PersonNr: drop if percenFullTime==. & _N>1 // 242 128 dropped
by year LopNr_PersonNr: drop if hoursFullTime==. & _N>1 // 2 893 436 dropped 
duplicates drop year LopNr_PersonNr, force // 268 392 observations deleted

* Impute the percent of hours worked for individuals in the private sector
* we take 40 hours as a full time week in Sweden
gen percenFullTimeNew=percenFullTime
gen percenFullTimeMiss=(percenFullTime==.) 
replace percenFullTimeNew=hoursFullTime/40*100 if percenFullTimeMiss==1 //2.8 million replaced

save "${data_intermediate}/${id_code}_Wages_clean.dta", replace

* Generate a variable for every person in every year in the dataset
keep Lop*
duplicates drop
expand 28
bysort Lop*: gen year = _n + 1989
merge 1:1 Lop* year using "${data_intermediate}/${id_code}_Wages_clean.dta"
drop _merge

* Generate lag variables
xtset LopNr_PersonNr year
foreach var in monthlyWage percenFullTimeNew percenFullTimeMiss {
	forvalues i=1/1 {
	gen L`i'_`var'=L`i'.`var'
} 
}

keep LopNr_PersonNr L1_monthlyWage L1_percenFullTimeNew L1_percenFullTimeMiss year

save "${data_intermediate}/${id_code}_Wages_clean.dta", replace


